{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv('./mjd_xinjiangxing.csv')\n",
    "# df.loc[0]['details_urls']\n",
    "# df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\kit\\AppData\\Local\\Temp/ipykernel_15636/782824815.py:4: FutureWarning: The default value of regex will change from True to False in a future version.\n",
      "  df['comments'] = df['comments'].str.replace('万\\+条评价','0000')\n"
     ]
    }
   ],
   "source": [
    "df['html_datas'] = df['details_urls']\n",
    "df['details_urls'] = df.details_urls.str.extract('tourl=\"(.*?)\" shopurl=\"')\n",
    "df['price'] = df.price.str.extract(\"([0-9]+\\.[0-9]+)\")\n",
    "df['comments'] = df['comments'].str.replace('万\\+条评价','0000')\n",
    "df['comments'] = df['comments'].str.replace('暂无评价','0')\n",
    "df['comments'] = df['comments'].str.strip('+条评价')\n",
    "df['comments'] = df['comments'].str.strip('条评价')\n",
    "\n",
    "# df['comments'] = df['comments'].str.replace('+条评价','')\n",
    "# df['comments'] = df.comments.str.replace('条评价','')\n",
    "# df['comments'] = df.comments.str.replace('暂无评价','0')\n",
    "df[['title','price','comments','details_urls','shoop']].to_csv('京东_新疆杏销售数据.csv',index=False,encoding='utf-8')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "c3ac3f88986c03f5f4ea9b167d463bfb61b25cb597313235fabe727a0f7e0da2"
  },
  "kernelspec": {
   "display_name": "Python 3.9.5 64-bit ('base': conda)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.5"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
